
gl dat "\\micro.intra\Projekt\P0624$\P0624_Gem\Politisk Geografi\Data"
gl temp "\\micro.intra\Projekt\P0624$\P0624_Gem\Politisk Geografi\temp"	
gl res "\\micro.intra\Projekt\P0624$\P0624_Gem\Politisk Geografi\Results"
	
cap log close	
log using "$res\log\merge.log", replace

set type double 
use "$dat\dist p_id year.dta", clear
joinby p_id year using "$dat\lisa_togeo.dta", unmatched(master)
drop _merge

gen valar = year

joinby p_id valar using "$dat\mun politician data", unmatched(both) 
	

	

keep p_id- dist inv1g_ejn  inv1g_eje valar vald parti_initial  age inc educ_year child*
	*bef=befolkning i distriktet adbef=vuxenbefolkning
	gen bef=1 if dist!=.
	gen adbef = 1 if inc!=.
	
	bysort p_id :egen fy_vald=min (valar) if vald==1
	gen vald_first= valar ==fy_vald
	drop if valar<2000
gen parti_low=lower(parti_initial)

***partier: b=sd m=moderatern c=centern k=kd f=liberalerna g=MP  s=social:demokraterna v=vansterpartiet

*generarar variabel för antalt nominerade och valda från varje parti
foreach parti in b m c k f l g s v{
gen vald_`parti'= vald==1 & parti_low =="`parti'"
 }

 
gen vald_right= vald==1 & ( parti_low =="c"| parti_low =="m" |parti_low =="k"  | parti_low =="f")
gen vald_left= vald==1 & (parti_low =="s"| parti_low =="g" |parti_low =="v")
 fasterxtile pct=inc , by(year) n(100)
gen retired=age>=65
gen highq= pct>75 & pct!=.
gen highed=educ_year>13
 gen em=1-inv1g_eje 

 

 collapse (sum) bef adbef vald* high*  em child* retired , by (llkk dist year)
	drop if llkk==.
	drop if dist==9999
	drop if adbef==.
	replace bef=bef+child_7_15 // children not included in calculation

sort llkk dist year

	
joinby dist llkk  year using "$dat\dist_election.dta" , unmatched(master)
drop _merge


joinby dist llkk year using "$dat\prices_precinct_ep.dta", unmatched(master) _merge(_mergeprice)
replace electionperiod= year if electionperiod==.
g ln_price_sqm_old=ln(price_sqm_old)

joinby llkk electionperiod using "$dat\mun competition.dta", unmatched(master)
drop _merge		

joinby llkk valar using "$dat\kommundata.dta", unmatched(master)
drop _merge


 
	foreach gr in vald {
 gen `gr'_maj =mmaj2*`gr'_m +  cmaj2*`gr'_c +  fpmaj2*`gr'_f + kdmaj2*`gr'_k + mpmaj2*`gr'_g + smaj2*`gr'_s + vmaj2*`gr'_v
 gen `gr'_min=(`gr'_right+`gr'_left)-`gr'_maj
 }

gen votes_right =rostmm + rostmc + rostmk+ rostmf
gen votes_left =rostms + rostmv + rostmg

gen votes_maj =mmaj2*rostmm +  cmaj2*rostmc +  fpmaj2*rostmf + kdmaj2*rostmk + mpmaj2*rostmg + smaj2*rostms + vmaj2*rostmv
gen votes_min =(1-mmaj2)*rostmm +  (1-cmaj2)*rostmc +  (1-fpmaj2)*rostmf + (1-kdmaj2)*rostmk + (1-mpmaj2)*rostmg + (1-smaj2)*rostms + (1-vmaj2)*rostmv

gen votes =rostm_gil
gen non_votes= adbef-votes


joinby dist llkk  year using "$dat\permits.dta" , unmatched(master)
drop _merge
joinby llkk dist year using "$dat\tenure_precinct_ep.dta", unmatched(master) _merge(_merge)
drop _merge
gen own= owning_house+ owning_coop


joinby dist llkk year using "$dat\property.dta", unmatched(master) _merge(_mergefast)

egen b_area_res= rowtotal(b_area_single b_area_chain b_area_two b_area_multi)
		
joinby dist year llkk using "$dat\coll_neighbours.dta"	, unmatched(master)	
drop _merge				
joinby dist year llkk using "$dat\timesame_neighbours.dta"	, unmatched(master)	
drop _merge				
joinby dist year llkk using "$dat\relatives_neighbours"	, unmatched(master)
drop _merge

egen llkk_year=group(llkk year)


 
*** calculate share of group living in districts ***

foreach var of varlist non_votes votes bef adbef  highed em  child_7_15 retired highq  votes_left votes_right   ///
area_single area_multi area_single_lag area_multi_lag vald- vald_left vald_maj vald_min votes_maj votes_min   b_area_single b_area_multi  b_multi b_single  pop own {
	bysort llkk year (dist):  egen kom_`var' =sum(`var')
	* calculate share of group living in the district
	gen sh_`var' =`var' /kom_`var' 
	}


	
foreach p in left right {
	bysort llkk year (dist): egen rost_kom_`p'= sum (votes_`p')
	gen sh_mv_`p'=votes_`p'/ rost_kom_`p'
}
	
foreach p in b m c k f  g s v{
	bysort llkk year (dist):  egen kom_votes_`p' =sum(rostm`p')
* calculate share of group living in the district
	gen sh_votes_`p' =rostm`p'/kom_votes_`p'
	}	
	

*** calculate concentration ***
	
foreach p in  left right {
	*difference voters politicians
		gen diff_vald_`p'_vs= sh_vald_`p'-sh_mv_`p'
}
	
gen diff_voters_bef= sh_votes-sh_non_votes

foreach var in votes_left votes_right {
**calculate difference between share of group and share of population living in district
	gen diff_`var'_bef =sh_`var'-((votes-`var')/(kom_votes-kom_`var'))
	}
	
foreach p in b m c k f  g s v{
	gen diff_votes_`p'_bef =sh_votes_`p'-((votes-rostm`p')/(kom_votes-kom_votes_`p'))
	gen diff_votes_`p'_altbef =sh_votes_`p'-((bef-rostm`p')/(kom_bef-kom_votes_`p'))
	}	
	
	
	foreach var in highed em  child_7_15 retired highq own   {
**calculate difference between share of group and share of population living in district
if "`var'"=="own" loc t pop
if "`var'"=="child_7_15"  loc t bef
if "`var'"!="own" & "`var'"!="child_7_15" loc t adbef
 gen diff_`var'_bef =sh_`var'-((`t'-`var')/(kom_`t'-kom_`var'))	
	}



	
foreach var of varlist area_single area_multi area_single_lag area_multi_lag vald- vald_left vald_maj- vald_min votes_maj votes_min b_area_multi b_area_single   b_multi b_single {
**calculate difference between share of group and share of population living in district
gen diff_`var'_bef =sh_`var'-sh_adbef
	}
	
	

*** calculate share relative to of  population in district	***

foreach var in vald em highq highed own {
if "`var'"=="own" loc t pop
else loc t adbef
gen `var'_sh= `var'/`t'
}
	


g pop_dens=bef/land_area
bys llkk year (dist): egen temp=mean(pop_dens)
replace pop_dens=temp if mi(pop_dens)  // missing for 90 obs
drop temp
	

gen floorarea_pc=  b_area_res/bef

foreach parti in b m c k f l g s v{
gen vald_`parti'_sh= vald_`parti'/adbef
gen votes_`parti'_sh=vsm_`parti'
}


*** calculate difference to municipality mean **

foreach var of varlist vald_b_sh- votes_v_sh  vald_sh- floorarea_pc ln_price_sqm_old  {
	bysort llkk year (dist): egen `var'_munmean=mean(`var')
gen `var'_mdiff=`var'-`var'_munmean
	}
	


replace  diff_area_multi_lag_bef = 0 if  diff_area_multi_lag_bef==. 
replace  diff_area_single_lag_bef = 0 if  diff_area_single_lag_bef==.   

gen diff_maj_min =sh_vald_maj -sh_vald_min 
gen diffvs_maj_min =sh_votes_maj -sh_votes_min 



*** define close elections ***
gen abs_diff_maj_min = abs( diff_maj_min)
bysort year llkk (dist): egen sum_abs_diff_maj_min=sum(abs_diff_maj_min)
bysort year llkk (dist): egen mean_abs_diff_maj_min=mean(abs_diff_maj_min)

bysort llkk year (dist): egen dist_count=count(dist)
 gen abddiff_vald_left_right=abs( sh_vald_left  -sh_vald_right)
 gen diff_vald_left_right= sh_vald_left  -sh_vald_right
	
	
gen w_dist_count=1/ dist_count
gen f_dist_count=int(w_dist_count*10000)
gen close = (abs(dist_h_simp) <.05| abs(dist_v_simp) <.05) & dist_count!=1
gen close_b = (abs(dist_h_simp) <.05 & abs(dist_v_simp) <.05) & dist_count!=1
gen vclose = (abs(dist_h_simp) <.025| abs(dist_v_simp) <.025)  & dist_count!=1
gen block_ch= block_rule<3 if block_rule!=. & dist_count!=1
gen cl_bl=close==1 & block_ch==1
gen bl= block_ch==1
replace all=dist_count!=1
fasterxtile majvs_4q =diffvs_maj_min, nq(4) by( llkk year)
fasterxtile majvs_10q =diffvs_maj_min, nq(10) by( llkk year)

*above below median size
sum kom_adbef [aw=w_dist_count], d
gen ab_med = kom_adbef> r(p50) if kom_adbef!=.
gen bel_med = kom_adbef<= r(p50) if kom_adbef!=.
gen ab_med_cl_bl= cl_bl==1 & ab_med==1
gen bel_med_cl_bl= cl_bl==1 & bel_med==1
drop if dist==9999


*standardize variables
foreach var of varlist diff_em_bef diff_own_bef  diff_highed_bef diff_highq_bef vald_b_sh_mdiff- votes_v_sh_mdiff diff_voters_bef  ln_price_sqm_old_mdiff ///
 diff_vald_first_bef  diff_vald_left_vs  diff_vald_right_vs diff_vald_right_bef  diff_vald_left_bef   ///
 floorarea_pc_mdiff  diff_vald_bef  diff_maj_min  diff_vald_left_right ///
	diff_b_area_multi diff_b_area_single pop_dens_mdiff   diff_retired_bef diff_child_7_15_bef  diff_area_single_lag_bef  diff_area_multi_lag_bef  ///
	    {
	  sum `var' [aweight=w_dist_count]  if year==2002
	  g std_`var'=  (`var' - r(mean))/ r(sd)  if year==2002
	  	  sum `var' [aweight=w_dist_count] if year==2006
	  replace std_`var'=  (`var' - r(mean))/ r(sd) if year==2006
	  	  sum `var' [aweight=w_dist_count] if year==2010
	  replace std_`var'=  (`var' - r(mean))/ r(sd) if year==2010
}


sort llkk dist year
 


keep year llkk llkk_year dist diff_vald_right_vs diff_vald_left_vs diff_vald_bef diff_vald_first_bef diff_vald_left_bef diff_vald_right_bef ///
diff_vald_m_bef diff_vald_c_bef diff_vald_k_bef diff_vald_f_bef diff_vald_g_bef diff_vald_s_bef diff_vald_v_bef ///
diff_highq_bef diff_highed_bef diff_own_bef diff_em_bef  diff_retired_bef diff_child_7_15_bef diff_voters_bef diff_b_area_multi diff_b_area_single  pop_dens_mdiff floorarea_pc_mdiff diff_area_single_lag_bef  diff_area_multi_lag_bef ///
std_diff_highq_bef std_diff_highed_bef std_diff_own_bef std_diff_em_bef  std_diff_retired_bef std_diff_child_7_15_bef  std_diff_voters_bef std_diff_b_area_multi std_diff_b_area_single std_ln_price_sqm_old_mdiff ///
std_pop_dens_mdiff std_floorarea_pc_mdiff  std_diff_area_single_lag_bef  std_diff_area_multi_lag_bef ///
close ab_med ab_med_cl_bl ab_med_c  bel_med bel_med_cl_bl bel_med_c  bl cl_bl allamaj2 mmaj2 cmaj2 fpmaj2 kdmaj2 mpmaj2 smaj2 vmaj2 ///
diff_votes_m_altbef diff_votes_c_altbef diff_votes_k_altbef diff_votes_f_altbef diff_votes_g_altbef diff_votes_s_altbef diff_votes_v_altbef diff_votes_b_altbef ///
diff_area_multi_bef area_multi b_area_multi diff_b_area_multi diff_area_single_bef area_single b_area_single diff_b_area_single_bef w_dist_count ///
diff_votes_m_bef diff_votes_c_bef diff_votes_k_bef diff_votes_f_bef diff_votes_g_bef diff_votes_s_bef diff_votes_v_bef diff_maj_min ///
diffvs_maj_min majvs_4q majvs_10q dist_h_simp dist_v_simp ///
diffalt_maj_min_vald_ab15sm diffalt_maj_min_vald_bel15sm diffalt_maj_min_vald_rel diffalt_maj_min_vald_norell diffalt_maj_min_vald_coll diffalt_maj_min_vald_nocoll  bef highq em highed own retired child_7_15 ///
votes pop_dens floorarea_pc ln_price_sqm_old  vald_maj vald_min vanster_maj hoger_maj diff_vald_left_right adbef f_dist_count dist_count ///
votes_m_sh_mdiff votes_c_sh_mdiff votes_k_sh_mdiff votes_f_sh_mdiff  votes_g_sh_mdiff votes_s_sh_mdiff votes_v_sh_mdiff ///
vald_m_sh_mdiff vald_c_sh_mdiff vald_k_sh_mdiff vald_f_sh_mdiff  vald_g_sh_mdiff vald_s_sh_mdiff vald_v_sh_mdiff ///
 em_sh_mdiff highq_sh_mdiff highed_sh_mdiff own_sh_mdiff vald_sh_mdiff diff_vald_maj_bef


save "$dat\main.dta", replace


	  

log close